{
 "cells": [
  {
   "cell_type": "code",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2025-01-07T12:36:49.573364Z",
     "start_time": "2025-01-07T12:36:49.104626Z"
    }
   },
   "source": [
    "import pandas as pd"
   ],
   "outputs": [],
   "execution_count": 1
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 2 Series"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# 生成一个Series\n",
    "\n",
    "ser_obj = pd.Series(range(0,20,2),dtype='int32') #默认索引是0-9\n",
    "print(ser_obj) #打印输出会带有类型\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:51:20.211979Z",
     "start_time": "2025-01-07T12:51:20.207550Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0     0\n",
      "1     2\n",
      "2     4\n",
      "3     6\n",
      "4     8\n",
      "5    10\n",
      "6    12\n",
      "7    14\n",
      "8    16\n",
      "9    18\n",
      "dtype: int32\n"
     ]
    }
   ],
   "execution_count": 26
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T12:39:19.373003Z",
     "start_time": "2025-01-07T12:39:19.368244Z"
    }
   },
   "cell_type": "code",
   "source": [
    "print('-'*50)\n",
    "# 获取数据\n",
    "print(ser_obj.values)  #values实际是ndarray\n",
    "print(type(ser_obj.values)) #类型是ndarray\n",
    "# 获取索引\n",
    "print(ser_obj.index)  #内部自带的类型--RangeIndex(指的是数组上标号的范围，左闭右开 )"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n",
      "[ 0  2  4  6  8 10 12 14 16 18]\n",
      "<class 'numpy.ndarray'>\n",
      "RangeIndex(start=0, stop=10, step=1)\n"
     ]
    }
   ],
   "execution_count": 6
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": ""
  },
  {
   "cell_type": "code",
   "source": [
    "print(ser_obj[0]) \n",
    "ser_obj[3]\n",
    "# 访问不存在的索引下标会报keyerror(不一定要print）"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:53:57.877029Z",
     "start_time": "2025-01-07T12:53:57.872411Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "np.int32(6)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 32
  },
  {
   "cell_type": "code",
   "source": [
    "print(ser_obj * 1.5)  #元素级乘法\n",
    "print(ser_obj > 6) #返回一个bool序列\n",
    "#通过广播机制，可以对Series进行算术运算"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:41:25.763235Z",
     "start_time": "2025-01-07T12:41:25.747326Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0     0.0\n",
      "1     3.0\n",
      "2     6.0\n",
      "3     9.0\n",
      "4    12.0\n",
      "5    15.0\n",
      "6    18.0\n",
      "7    21.0\n",
      "8    24.0\n",
      "9    27.0\n",
      "dtype: float64\n",
      "0    False\n",
      "1    False\n",
      "2    False\n",
      "3    False\n",
      "4     True\n",
      "5     True\n",
      "6     True\n",
      "7     True\n",
      "8     True\n",
      "9     True\n",
      "dtype: bool\n"
     ]
    }
   ],
   "execution_count": 11
  },
  {
   "cell_type": "code",
   "source": [
    "#字典变为series，索引是字典的key，value是字典的value，感受非默认索引\n",
    "\n",
    "year_data = {2001: 17.8, 2005: 20.1, 2003: 16.5}\n",
    "ser_obj2 = pd.Series(year_data)\n",
    "print(ser_obj2)\n",
    "print('-'*50)\n",
    "print(ser_obj2.index)\n",
    "print('-'*50)\n",
    "print(ser_obj2[2001])\n",
    "ser_obj2.values"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:42:56.335744Z",
     "start_time": "2025-01-07T12:42:56.319564Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2001    17.8\n",
      "2005    20.1\n",
      "2003    16.5\n",
      "dtype: float64\n",
      "--------------------------------------------------\n",
      "Index([2001, 2005, 2003], dtype='int64')\n",
      "--------------------------------------------------\n",
      "17.8\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([17.8, 20.1, 16.5])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 13
  },
  {
   "cell_type": "code",
   "source": [
    "#有点鸡肋\n",
    "print(ser_obj2.name) #Series名字\n",
    "print(ser_obj2.index.name)  #索引名字\n",
    "ser_obj2.name = 'temp'\n",
    "ser_obj2.index.name = 'year1'\n",
    "print('-'*50)\n",
    "print(ser_obj2.head())  #head默认显示前5行\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:42:59.014369Z",
     "start_time": "2025-01-07T12:42:59.010597Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n",
      "None\n",
      "--------------------------------------------------\n",
      "year1\n",
      "2001    17.8\n",
      "2005    20.1\n",
      "2003    16.5\n",
      "Name: temp, dtype: float64\n"
     ]
    }
   ],
   "execution_count": 14
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 3 DataFrame"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "import numpy as np\n",
    "\n",
    "# 通过ndarray构建DataFrame\n",
    "t = pd.DataFrame(np.arange(12).reshape((3,4))) #默认索引是0-2\n",
    "print(t)\n",
    "print('-'*50)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:46:49.563168Z",
     "start_time": "2025-01-07T12:46:49.557288Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   0  1   2   3\n",
      "0  0  1   2   3\n",
      "1  4  5   6   7\n",
      "2  8  9  10  11\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "execution_count": 18
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T12:48:15.389600Z",
     "start_time": "2025-01-07T12:48:15.375023Z"
    }
   },
   "cell_type": "code",
   "source": [
    "array = np.random.randn(5,4)#z=随机的正态分布，n=5*4\n",
    "print(array)\n",
    "print('-'*50)\n",
    "df_obj = pd.DataFrame(array)\n",
    "print(df_obj.head()) #默认显示前5行"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[-0.72937663 -1.44773976 -0.4462934  -0.03141468]\n",
      " [-0.8420151   1.39409288 -1.5165711   0.28321444]\n",
      " [-0.80330383 -0.11642986  0.9158005   1.61636243]\n",
      " [ 0.36607774 -0.49723561  1.22344942 -0.26353924]\n",
      " [ 2.23366382 -0.29623649 -0.4051966   0.8843238 ]]\n",
      "--------------------------------------------------\n",
      "          0         1         2         3\n",
      "0 -0.729377 -1.447740 -0.446293 -0.031415\n",
      "1 -0.842015  1.394093 -1.516571  0.283214\n",
      "2 -0.803304 -0.116430  0.915800  1.616362\n",
      "3  0.366078 -0.497236  1.223449 -0.263539\n",
      "4  2.233664 -0.296236 -0.405197  0.884324\n"
     ]
    }
   ],
   "execution_count": 19
  },
  {
   "cell_type": "code",
   "source": "df_obj.loc[0] #单独把某一行取出来,类型是series",
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:48:21.750754Z",
     "start_time": "2025-01-07T12:48:21.746354Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0   -0.729377\n",
       "1   -1.447740\n",
       "2   -0.446293\n",
       "3   -0.031415\n",
       "Name: 0, dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 20
  },
  {
   "cell_type": "code",
   "source": [
    "# 列表套字典  变df\n",
    "d2 =[{\"name\" : \"xiaohong\" ,\"age\" :32,\"tel\" :10010},\n",
    "     { \"name\": \"xiaogang\" ,\"tel\": 10000} ,\n",
    "     {\"name\":\"xiaowang\" ,\"age\":22}]\n",
    "df6=pd.DataFrame(d2)\n",
    "print(df6) #缺失值会用NaN填充\n",
    "print(type(df6.values)) #ndarray"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:49:02.050228Z",
     "start_time": "2025-01-07T12:49:02.040255Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       name   age      tel\n",
      "0  xiaohong  32.0  10010.0\n",
      "1  xiaogang   NaN  10000.0\n",
      "2  xiaowang  22.0      NaN\n",
      "<class 'numpy.ndarray'>\n"
     ]
    }
   ],
   "execution_count": 21
  },
  {
   "cell_type": "code",
   "source": [
    "import random\n",
    "x=[]\n",
    "for i in range(0,4):\n",
    "    x.append(random.randint(0,20))\n",
    "pd.Series(x, index=list(range(3,7)),dtype='int32')#生成Series，索引是.Series(值{如果为列表，也可以成立},索引,类型)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:03:36.784041Z",
     "start_time": "2025-01-07T13:03:36.778263Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3    17\n",
       "4     6\n",
       "5    15\n",
       "6     1\n",
       "dtype: int32"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 50
  },
  {
   "cell_type": "code",
   "source": [
    "#df中不同列可以是不同的数据类型,同一列必须是一个数据类型\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "dict_data = {'A': 1,\n",
    "             'B': pd.Timestamp('20190926'),\n",
    "             'C': pd.Series(1, index=list(range(4)),dtype='float32'),\n",
    "             'D': np.array([1,2,3,4],dtype='int32'),\n",
    "             'E': [\"Python\",\"Java\",\"C++\",\"C\"],\n",
    "             'F': 'wangdao' }\n",
    "df_obj2 = pd.DataFrame(dict_data)\n",
    "print(df_obj2)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:03:54.851Z",
     "start_time": "2025-01-07T13:03:54.832809Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A          B    C  D       E        F\n",
      "0  1 2019-09-26  1.0  1  Python  wangdao\n",
      "1  1 2019-09-26  1.0  2    Java  wangdao\n",
      "2  1 2019-09-26  1.0  3     C++  wangdao\n",
      "3  1 2019-09-26  1.0  4       C  wangdao\n"
     ]
    }
   ],
   "execution_count": 51
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T06:10:38.983232Z",
     "start_time": "2025-01-07T06:10:38.956132Z"
    }
   },
   "cell_type": "code",
   "source": [
    "print('-'*50)\n",
    "print(df_obj2.index) #行索引,重点\n",
    "#补课改变\n",
    "# df_obj2.index[0]=2  不可以单独修改某个索引值\n",
    "print(df_obj2.columns) #列索引，重点\n",
    "df_obj2.dtypes #每一列的数据类型，重点"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n",
      "Index([0, 1, 2, 3], dtype='int64')\n",
      "Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "A            int64\n",
       "B    datetime64[s]\n",
       "C          float32\n",
       "D            int32\n",
       "E           object\n",
       "F           object\n",
       "dtype: object"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 19
  },
  {
   "cell_type": "code",
   "source": [
    "# 感受日期,初始化df，设置行索引，列索引\n",
    "dates = pd.date_range('20130101', periods=8) #默认freq='D'，即天（period指的是时间跨度，每次一天一天加）\n",
    "df = pd.DataFrame(np.random.randn(8, 4), index=dates, columns=list('ABCD'))\n",
    "print(df)\n",
    "print('-'*50)\n",
    "print(df.index)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:05:09.586119Z",
     "start_time": "2025-01-07T13:05:09.579625Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                   A         B         C         D\n",
      "2013-01-01  1.096445 -1.422888 -1.056432  1.246961\n",
      "2013-01-02 -1.943737 -0.575866  1.291056 -0.159179\n",
      "2013-01-03  0.249685  0.915606  0.280536  0.548222\n",
      "2013-01-04 -2.622612  0.116374  0.606068  0.154860\n",
      "2013-01-05 -1.258433  0.227251  0.022726 -1.728390\n",
      "2013-01-06 -1.988355 -0.295499 -0.872251  1.563891\n",
      "2013-01-07  1.031710  1.144261 -0.412902 -1.180126\n",
      "2013-01-08  1.056824 -2.215794 -0.067220 -1.283112\n",
      "--------------------------------------------------\n",
      "DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n",
      "               '2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08'],\n",
      "              dtype='datetime64[ns]', freq='D')\n"
     ]
    }
   ],
   "execution_count": 53
  },
  {
   "cell_type": "code",
   "source": [
    "#取数据\n",
    "print(df_obj2)\n",
    "print('-'*50)\n",
    "print(type(df_obj2))\n",
    "print('-'*50)\n",
    "#pd中使用索引名来取某一行，或者列\n",
    "print(df_obj2['B'])\n",
    "print('-'*50)\n",
    "#把df的某一列取出来是series\n",
    "print(type(df_obj2['B']))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:06:26.458835Z",
     "start_time": "2025-01-07T13:06:26.452603Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A          B    C  D       E        F\n",
      "0  1 2019-09-26  1.0  1  Python  wangdao\n",
      "1  1 2019-09-26  1.0  2    Java  wangdao\n",
      "2  1 2019-09-26  1.0  3     C++  wangdao\n",
      "3  1 2019-09-26  1.0  4       C  wangdao\n",
      "--------------------------------------------------\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "--------------------------------------------------\n",
      "0   2019-09-26\n",
      "1   2019-09-26\n",
      "2   2019-09-26\n",
      "3   2019-09-26\n",
      "Name: B, dtype: datetime64[s]\n",
      "--------------------------------------------------\n",
      "<class 'pandas.core.series.Series'>\n"
     ]
    }
   ],
   "execution_count": 57
  },
  {
   "cell_type": "code",
   "source": [
    "#增加列数据，列名是自定义的\n",
    "df_obj2['G'] = df_obj2['D'] + 4\n",
    "print(df_obj2.head())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:06:20.121007Z",
     "start_time": "2025-01-07T13:06:20.105047Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A          B    C  D       E        F  G\n",
      "0  1 2019-09-26  1.0  1  Python  wangdao  5\n",
      "1  1 2019-09-26  1.0  2    Java  wangdao  6\n",
      "2  1 2019-09-26  1.0  3     C++  wangdao  7\n",
      "3  1 2019-09-26  1.0  4       C  wangdao  8\n"
     ]
    }
   ],
   "execution_count": 55
  },
  {
   "cell_type": "code",
   "source": [
    "# 删除列\n",
    "del(df_obj2['G'])\n",
    "print(df_obj2.head())\n",
    "\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:06:22.443101Z",
     "start_time": "2025-01-07T13:06:22.429032Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A          B    C  D       E        F\n",
      "0  1 2019-09-26  1.0  1  Python  wangdao\n",
      "1  1 2019-09-26  1.0  2    Java  wangdao\n",
      "2  1 2019-09-26  1.0  3     C++  wangdao\n",
      "3  1 2019-09-26  1.0  4       C  wangdao\n"
     ]
    }
   ],
   "execution_count": 56
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
